# coding = utf-8
import requests
import re
from lxml import etree

def get_data():
    url = 'http://kaijiang.500.com/ssq.shtml'
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/62.0.3202.94 Safari/537.36'
    }
    html_str = requests.get(url,headers=headers)
    html_str.encoding = 'gb2312'
    url_list = re.findall('(http://kaijiang.500.com/shtml/ssq/\d.*?.shtml)',html_str.text)
    for url in url_list:
        html_str = requests.get(url,headers=headers)

        ball_list = etree.HTML(html_str.text).xpath('//ul/li[@class="ball_red"]/text()')[:]
        blue_ball_list = etree.HTML(html_str.text).xpath('//ul/li[@class="ball_blue"]/text()')
        if len(blue_ball_list) == 1:
            blue_ball = blue_ball_list[0]
        else:
            blue_ball = None
        ball_list.append(blue_ball)
        if len(ball_list) == 7:
            print(ball_list)
            with open('ball/red_ball_1.log','a') as f:
                f.write(ball_list[0]+',')
            with open('ball/red_ball_2.log','a') as f:
                f.write(ball_list[1]+',')
            with open('ball/red_ball_3.log','a') as f:
                f.write(ball_list[2]+',')
            with open('ball/red_ball_4.log','a') as f:
                f.write(ball_list[3]+',')
            with open('ball/red_ball_5.log','a') as f:
                f.write(ball_list[4]+',')
            with open('ball/red_ball_6.log','a') as f:
                f.write(ball_list[5]+',')
            with open('ball/blue_ball.log','a') as f:
                f.write(ball_list[6]+',')


if __name__ == '__main__':
    get_data()


